* ------------------------------------------------------------------------------
* Calculate generalised indices and test for changes across cohorts
* ------------------------------------------------------------------------------


* Routine: create dataset of indices and test stats for each 2x2 submatrix
* ------------------------------------------------------------------------------

	cap program drop calcGenStruct
	program define calcGenStruct

	{

		* Calculate various indexes of assortativess, for all cohorts and all 
		* bootstrap replications
			
			qui gen a = .
			qui gen b = .
			qui gen c = .
			qui gen d = .

			** Local indices
			
				* PG vs C and C vs SC
				forvalues s1 = 3/4 {
				local s2 = `s1'+1

					* 2x2 sorting matrix
						qui replace a = Sm`s1'w`s1'
						qui replace b = Sm`s1'w`s2'
						qui replace c = Sm`s2'w`s1'
						qui replace d = Sm`s2'w`s2'
					* indices
						qui gen I1_`s1'`s2' = a / (a+b)
						qui gen I2_`s1'`s2' = a / (a+c)
						qui gen I3_`s1'`s2' = d / (d+b)
						qui gen I4_`s1'`s2' = d / (d+c)

				}
				
				* PG+C vs SC: 2x2 sorting matrix
					qui replace a = Sm4w4 + Sm5w5 + Sm4w5 + Sm5w4
					qui replace b = Sm5w3 + Sm4w3
					qui replace c = Sm3w5 + Sm3w4				
					qui replace d = Sm3w3				
				* indices
					qui gen I1_345 = a / (a+b)
					qui gen I2_345 = a / (a+c)
					qui gen I3_345 = d / (d+b)
					qui gen I4_345 = d / (d+c)						
					

			** Global indices
			
				* sum of all elements in sorting matrix
					qui gen sumS = 0
					forvalues s1 = 1/$Ns {
					forvalues s2 = 1/$Ns {
						qui replace sumS = sumS + Sm`s1'w`s2'
					}
					}
				
				* 2x2 sorting matrix
					qui replace b = Sm5w3 + Sm4w3 + Sm5w2 + Sm4w2 + Sm5w1 + Sm4w1
					qui replace c = Sm3w5 + Sm3w4 + Sm2w5 + Sm2w4 + Sm1w5 + Sm1w4
					qui replace d = sumS-a-b-c
				* indices
					qui gen I1_12345 = a / (a+b)
					qui gen I2_12345 = a / (a+c)
					qui gen I3_12345 = d / (d+b)
					qui gen I4_12345 = d / (d+c)
			
			
		* clean & reshape data
		
			qui keep rep coh I?_*
			qui rename I* I*_c
			qui reshape wide I*, i(rep) j(coh)
			qui sort rep
			
		
		** construct across cohort differences in indices and t-stats

			local ix = 0
			
			foreach s in 345 45 34 12345 { 
			foreach I in I1_ I2_ I3_ I4_ {

				local ix = `ix'+1

					* differences in indices
				qui gen dI`ix' = `I'`s'_c2 - `I'`s'_c1
					
					* calculate & store SE of difference
				qui sum dI`ix' if rep>0
				global se_dI`ix' = r(sd)
					
					* calculate t-stat for original sample
				qui gen t_dI`ix' = abs(dI`ix')/${se_dI`ix'} if rep==0

					* calculate t-stat for BS repetitions
				qui replace t_dI`ix' = abs(dI`ix'-dI`ix'[1]) / ${se_dI`ix'} if rep>0

			}
			}
			
		** Clean and reshape data
		** ----------------------

			qui keep rep dI* t_dI*
			qui reshape long dI t_dI, i(rep) j(index)
			label define index 1  "PG+C vs SC: a/(a+b)"      ///
							   2  "PG+C vs SC: a/(a+c)"      ///
							   3  "PG+C vs SC: d/(d+b)"      ///
							   4  "PG+C vs SC: d/(d+c)"      /// 
							   5  "PG vs C: a/(a+b)"         ///
							   6  "PG vs C: a/(a+c)"         ///
							   7  "PG vs C: d/(d+b)"         ///
							   8  "PG vs C: d/(d+c)"         ///
							   9  "C vs SC: a/(a+b)"         ///
							   10 "C vs SC: a/(a+c)"         ///
							   11 "C vs SC: d/(d+b)"         ///
							   12 "C vs SC: d/(d+c)"         ///
							   13 "PG+C vs others: a/(a+b)"  ///
							   14 "PG+C vs others: a/(a+c)"  ///
							   15 "PG+C vs others: d/(d+b)"  ///
							   16 "PG+C vs others: d/(d+c)"
			label val index index
			
			label var dI    "difference in indices across cohorts"
			label var t_dI  "t-stat for diff across cohorts"
			label var index "index"		

	}

	end


** Routine to calculate p-val
** -----------------------------------------------------------------------------

	cap program drop get_pval
	program define get_pval
	
	{

		qui preserve
		local mk = $mk
		di `mk'
		forvalues n=1/$nstat {
			
			qui gsort rep -t_dI
			qui cap drop n
			qui by rep: gen n=_n
			
			local ix = index[1]                                      // reference of next max tstat
			global tdI`mk'_`ix' = t_dI[1]                            // next max tstat
			global dI`mk'_`ix'  = dI[1]                              // corresponding difference
			global se`mk'_`ix'  = abs(dI[1])/t_dI[1]                 // and its se
			qui count if ${tdI`mk'_`ix'}<t_dI & rep>0 & n==1         // prob larger tstat for adjusted pval
			global ap`mk'_`ix' = r(N)/$nrep                          // adjusted p-val
									
			qui count if ${tdI`mk'_`ix'}<t_dI & rep>0 & index==`ix'  // prob larger tstat for unadjusted pval
			global up`mk'_`ix' = r(N)/$nrep                          // unadjusted p-val
			
			qui drop if index==`ix'

		}
		qui restore
		
	}

	end	


** calculate test statistics
** -----------------------------------------------------------------------------

{

	global Ns = 5
	local l1 "30"
	local l2 "40"
	local l3 "50"
	local l4 "60"
	local l5 "70"
	
	local c1 = 3
	local c2 = 5
	qui use "$logfolder\bsSmx.dta", replace
	label drop coh
	qui keep if inlist(coh,`c1',`c2')
	qui recode coh (`c1'=1) (`c2'=2)	
	calcGenStruct
	qui save "$logfolder\bsGenTests_`l`c2''vs`l`c1''.dta", replace

}


* calculate p-values
** -----------------------------------------------------------------------------

{
	
	local l1 "30"
	local l2 "40"
	local l3 "50"
	local l4 "60"
	local l5 "70"
	
	local c1 = 3
	local c2 = 5
	qui use "$logfolder\bsGenTests_`l`c2''vs`l`c1''.dta", replace
	global c1 = 1900+10*(`c1'+2)
	global c2 = 1900+10*(`c2'+2)

	qui sum rep
	global nrep = r(max)

	** p-values, all 16 indices together
	global nstat = 16
	global mk    = 0
	qui get_pval
	
	** p-values, each market separately
	global nstat = 4
	forvalues mk=1/4 {
		qui preserve
		qui keep if inrange(index, (`mk'-1)*$nstat+1, `mk'*$nstat)
		global mk=`mk'
		qui get_pval
		qui restore
	}

	* print estimates in latex format
	
	local c=10*(`c1'+2)
	di _newline ///
	"----------------------------------------------------", _newline ///	
	"Cohort ${c2} vs `c'" _newline ///
	"Columns: C&PG vs SC,  PG vs C, C vs SC, C&PG vs others", _newline _newline ///
	"a/(a+b)    ",     %6.3f ${dI0_1},     "   ",     %6.3f ${dI0_5},     "   ",     %6.3f ${dI0_9},      "   ",     %6.3f ${dI0_13}    , _newline ///
    "           ", "(" %3.2f ${ap0_1} ")", "   ", "(" %3.2f ${ap0_5} ")", "   ", "(" %3.2f ${ap0_9}  ")", "   ", "(" %3.2f ${ap0_13} ")", _newline ///
    "           ", "[" %3.2f ${ap1_1} "]", "   ", "[" %3.2f ${ap2_5} "]", "   ", "[" %3.2f ${ap3_9}  "]", "   ", "[" %3.2f ${ap4_13} "]", _newline _newline ///
	"a/(a+b)    ",     %6.3f ${dI0_2},     "   ",     %6.3f ${dI0_6},     "   ",     %6.3f ${dI0_10},     "   ",     %6.3f ${dI0_14}    , _newline ///
    "           ", "(" %3.2f ${ap0_2} ")", "   ", "(" %3.2f ${ap0_6} ")", "   ", "(" %3.2f ${ap0_10} ")", "   ", "(" %3.2f ${ap0_14} ")", _newline ///
    "           ", "[" %3.2f ${ap1_2} "]", "   ", "[" %3.2f ${ap2_6} "]", "   ", "[" %3.2f ${ap3_10} "]", "   ", "[" %3.2f ${ap4_14} "]", _newline _newline ///
	"d/(d+b)    ",     %6.3f ${dI0_3},     "   ",     %6.3f ${dI0_7},     "   ",     %6.3f ${dI0_11},     "   ",     %6.3f ${dI0_15}    , _newline ///
    "           ", "(" %3.2f ${ap0_3} ")", "   ", "(" %3.2f ${ap0_7} ")", "   ", "(" %3.2f ${ap0_11} ")", "   ", "(" %3.2f ${ap0_15} ")", _newline ///
    "           ", "[" %3.2f ${ap1_3} "]", "   ", "[" %3.2f ${ap2_7} "]", "   ", "[" %3.2f ${ap3_11} "]", "   ", "[" %3.2f ${ap4_15} "]", _newline _newline ///
	"d/(d+c)    ",     %6.3f ${dI0_4},     "   ",     %6.3f ${dI0_8},     "   ",     %6.3f ${dI0_12},     "   ",     %6.3f ${dI0_16}    , _newline ///
    "           ", "(" %3.2f ${ap0_4} ")", "   ", "(" %3.2f ${ap0_8} ")", "   ", "(" %3.2f ${ap0_12} ")", "   ", "(" %3.2f ${ap0_16} ")", _newline ///
    "           ", "[" %3.2f ${ap1_4} "]", "   ", "[" %3.2f ${ap2_8} "]", "   ", "[" %3.2f ${ap3_12} "]", "   ", "[" %3.2f ${ap4_16} "]", _newline	///
	"----------------------------------------------------"
}
